getwd()## [1] "/Users/mac/FINAL_PROJECT/postpartum_depression_prediction/P3"
load('../../data/tidy_data.Rdata')
pp_sad <- df_tidy # rename meaningful glimpse(pp_sad)## Observations: 44
## Variables: 21
## $ age <dbl> 32, 34, 29, 29, 32, 30, 36, 36, 31, 34, ...
## $ education_level <dbl> 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2...
## $ nationality <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1...
## $ postpartum_depression <dbl> 1, 2, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2...
## $ employed <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1...
## $ occupation <dbl> 2, 1, 1, 1, 2, 2, 3, 3, 3, 7, 2, 1, 6, 4...
## $ sport <dbl> 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0...
## $ pet <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1...
## $ dyed_hair <dbl> 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0...
## $ first_pregnancy <dbl> 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0...
## $ wanted_pregnancy <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1...
## $ pregnancy_method <dbl> 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1...
## $ previous_miscarriage <dbl> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1...
## $ fetus_sex <int> 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1...
## $ epds <dbl> 8, 15, 5, 4, 12, 3, 24, 10, 3, 6, 3, 3, ...
## $ depression_tri1 <dbl> 30.00000, 97.00000, 82.00000, 92.00000, ...
## $ depression_tri2 <dbl> 35.00000, 85.00000, 75.00000, 90.00000, ...
## $ depression_tri3 <dbl> 35.00000, 65.00000, 60.00000, 90.00000, ...
## $ cortisol_tri1 <dbl> 282.4000, 362.3000, 124.7000, 113.4900, ...
## $ cortisol_tri2 <dbl> 246.0000, 156.1000, 171.5000, 167.1000, ...
## $ cortisol_tri3 <dbl> 295.6000, 539.8000, 318.1000, 192.2000, ...
# summary(pp_sad)#2 = women with postpartum depression symptoms; 1 = women with no postpartum depression symptoms
pp_sad %>% ggplot() + geom_density(aes(x=epds, group=postpartum_depression, fill = postpartum_depression, alpha=0.3, bins=50)) +
labs(x = "Edinburgh Postnatal Depression Scale")+
ggtitle('Depression populations by postpartum outcome')## Warning: Ignoring unknown aesthetics: bins
pp_sad %>% ggplot() + geom_density(aes(x=epds, fill = 'total', alpha=0.3)) +
labs(x = "Edinburgh Postnatal Depression Scale")+
ggtitle('Depression score distribution')The accuracy of the tables seem to be dependent on the trimester when the test is taken they are more accurate in the third trimester then the first this is to be expected. however the emans do not seem to vary much even in the third trimester so perhaps this is not a very accurate method regardless. trimester 2 seems to be the most infulential predictor.
#variation of depression scores in trimesters seperated by outcome of post partum depression or not
#2 = women with postpartum depression symptoms; 1 = women with no postpartum depression symptoms
pp_sad %>% ggplot() + geom_density(aes(x=depression_tri1, fill='tri1', alpha = 0.3)) + geom_density(aes(x=depression_tri2, fill='tri2', alpha = 0.3)) + geom_density(aes(x=depression_tri3, fill='tri3', alpha = 0.3)) + facet_wrap(~postpartum_depression) + ggtitle('trimester depression populations by postpartum outcome') + xlab('depression score')pp_sad %>% ggplot() + geom_density(aes(x=depression_tri1, group=as.factor(postpartum_depression), fill=as.factor(postpartum_depression), alpha=0.25))+ ggtitle('first trimester depression populations by postpartum outcome')pp_sad %>% ggplot() + geom_density(aes(x=depression_tri2, group=as.factor(postpartum_depression), fill=as.factor(postpartum_depression), alpha = 0.25))+ggtitle('second trimester depression populations by postpartum outcome')pp_sad %>% ggplot() + geom_density(aes(x=depression_tri3, group=as.factor(postpartum_depression), fill=as.factor(postpartum_depression), alpha = 0.25))+ggtitle('third trimester depression populations by postpartum outcome')pp_sad %>% ggplot() + geom_violin(aes(x=postpartum_depression,y=depression_tri1,group=postpartum_depression, fill='tri1', alpha=0.3)) + geom_violin(aes(x=postpartum_depression,y=depression_tri2,group=postpartum_depression, fill='tri2', alpha=0.3)) + geom_violin(aes(x=postpartum_depression,y=depression_tri3,group=postpartum_depression, fill='tri3', alpha=0.3))+ ggtitle('trimester depression populations by postpartum outcome')#same info shown in boxplots
#ggplot(pp_sad,aes(x=postpartum_depression,y=depression_tri1,group=postpartum_depression,fill=as.factor(postpartum_depression)))+
# geom_boxplot()+
#theme_gray()
#ggplot(pp_sad,aes(x=postpartum_depression,y=depression_tri2,group=postpartum_depression,fill=as.factor(postpartum_depression)))+
# geom_boxplot()+
#theme_gray()
#ggplot(pp_sad,aes(x=postpartum_depression,y=depression_tri3,group=postpartum_depression,fill=as.factor(postpartum_depression)))+
# geom_boxplot()+
#theme_gray()pp_sad %>% ggplot() + geom_density(aes(x=cortisol_tri1, fill='tri1', alpha = 0.3)) + geom_density(aes(x=cortisol_tri2, fill='tri2', alpha = 0.3)) + geom_density(aes(x=cortisol_tri3, fill='tri3', alpha = 0.3)) + facet_wrap(~postpartum_depression) + ggtitle('trimester cortisol populations by postpartum outcome') + xlab('cortisol level')pp_sad %>% ggplot() + geom_density(aes(x=cortisol_tri1, group=as.factor(postpartum_depression), fill=as.factor(postpartum_depression), alpha=0.25))+ ggtitle('First trimester cortisol populations by postpartum outcome')pp_sad %>% ggplot() + geom_density(aes(x=cortisol_tri2, group=as.factor(postpartum_depression), fill=as.factor(postpartum_depression), alpha = 0.25))+ ggtitle('Second trimester cortisol populations by postpartum outcome')pp_sad %>% ggplot() + geom_density(aes(x=cortisol_tri3, group=as.factor(postpartum_depression), fill=as.factor(postpartum_depression), alpha = 0.25))+ ggtitle('Third trimester cortisol populations by postpartum outcome')it seemed important to investigate other sources of cortisol increase as the stress from being emoployed or not seeme like it would have an effect on the cortisol levles of the individual as well as the depression scores related. as such thoughs two paramaters were compaired to employment status. How ever neither difference was was very drastic.
ggplot(pp_sad,aes(x=employed,y=cortisol_tri1,group=employed,fill=as.factor(employed)))+
geom_boxplot()+
ggtitle('first trimester cortisol populations by employment status')+
theme_gray()ggplot(pp_sad,aes(x=employed,y=cortisol_tri2,group=employed,fill=as.factor(employed)))+
geom_boxplot()+
ggtitle('second trimester cortisol populations by employment status')+
theme_gray()ggplot(pp_sad,aes(x=employed,y=cortisol_tri3,group=employed,fill=as.factor(employed)))+
geom_boxplot()+
ggtitle('third trimester cortisol populations by employment status')+
theme_gray()ggplot(pp_sad,aes(x=employed,y=depression_tri1,group=employed,fill=as.factor(employed)))+
geom_boxplot()+
ggtitle('First trimester depression populations by employment status')+
theme_gray()ggplot(pp_sad,aes(x=employed,y=depression_tri2,group=employed,fill=as.factor(employed)))+
geom_boxplot()+
ggtitle('Second trimester depression populations by employment status')+
theme_gray()ggplot(pp_sad,aes(x=employed,y=depression_tri3,group=employed,fill=as.factor(employed)))+
geom_boxplot()+
ggtitle('Third trimester depression populations by employment status')+
theme_gray()we also investigated the relationship between job type and cortosol and depression scores as we thought that this two would affect the results. there does seem to be some correlation here between having a job and depression score but not so much cortosol levels. (1 = unemployment)
ggplot(pp_sad,aes(x=occupation,y=depression_tri1,group=occupation,fill=as.factor(occupation)))+
geom_boxplot()+
ggtitle('First trimester depression populations by employment type')+
theme_gray()ggplot(pp_sad,aes(x=occupation,y=depression_tri2,group=occupation,fill=as.factor(occupation)))+
geom_boxplot()+
ggtitle('Second trimester depression populations by employment type')+
theme_gray()ggplot(pp_sad,aes(x=occupation,y=depression_tri3,group=occupation,fill=as.factor(occupation)))+
geom_boxplot()+
ggtitle('Third trimester depression populations by employment type')+
theme_gray()ggplot(pp_sad,aes(x=occupation,y=cortisol_tri1,group=occupation,fill=as.factor(occupation)))+
geom_boxplot()+
ggtitle('First trimester cortisol populations by employment type')+
theme_gray()ggplot(pp_sad,aes(x=occupation,y=cortisol_tri2,group=occupation,fill=as.factor(occupation)))+
geom_boxplot()+
ggtitle('Second trimester cortisol populations by employment type')+
theme_gray()ggplot(pp_sad,aes(x=occupation,y=cortisol_tri3,group=occupation,fill=as.factor(occupation)))+
geom_boxplot()+
ggtitle('Third trimester cortisol populations by employment type')+
theme_gray()while employment type dosent seem to have to much of an effect on postpartum depression it would seem that being unemployed does
ggplot(pp_sad,aes(x=occupation,y=postpartum_depression,fill=as.factor(postpartum_depression)))+
geom_col()+
ggtitle('Employment type by postpartum depression')+
theme_gray()It seems that there could be some correlation between being employed and being less likely to be affected by postpartum depression however the sample size for employed wemon is vey small so this observatiion could normalise with a larger data set.
ggplot(pp_sad,aes(x=employed,y=postpartum_depression,fill=as.factor(postpartum_depression)))+
geom_col()+
ggtitle('Employment status by postpartum depression')+
labs(x = "unemployed / employed")+
theme_gray()This seemed to have remarkably little affect
ggplot(pp_sad,aes(x=wanted_pregnancy,y=postpartum_depression,fill=as.factor(postpartum_depression)))+
geom_col()+
ggtitle('Disire for pregnancy by postpartum depression')+
theme_gray()this would seem to show that perhaps wemon on their first pregnancy are more likely to be affected by postpartum depression however again this may be due to the smaple size.
ggplot(pp_sad,aes(x=first_pregnancy,y=postpartum_depression,fill=as.factor(postpartum_depression)))+
geom_col()+
ggtitle('First Pregnancy by postpartum depression')+
theme_gray()We investigated the affect of fetal sex on post partum depression and found the possiple correlation between fetal sex and depression mentioned in the paper, I does seem though that the cortisol levels equalise at the third trimester.
ggplot(pp_sad,aes(x=fetus_sex,y=postpartum_depression,fill=as.factor(postpartum_depression)))+
geom_col()+
ggtitle('Sex of fetus by postpartum depression')+
theme_gray()pp_sad %>% ggplot() +
geom_density(aes(x=cortisol_tri1, group=as.factor(fetus_sex), fill=as.factor(fetus_sex), alpha = 0.25))+
ggtitle('First trimester cortisol populations by fetus sex')pp_sad %>% ggplot() +
geom_density(aes(x=cortisol_tri2, group=as.factor(fetus_sex), fill=as.factor(fetus_sex), alpha = 0.25))+
ggtitle('Second trimester cortisol populations by fetus sex')pp_sad %>% ggplot() +
geom_density(aes(x=cortisol_tri3, group=as.factor(fetus_sex), fill=as.factor(fetus_sex), alpha = 0.25))+
ggtitle('Third trimester cortisol populations by fetus sex')this was interesting because it switches to a positive correlation in the last semester. also there is not much difference it seems in the points and that the regression line is being pulled by sevral outliers.
ggplot(pp_sad,aes(x=depression_tri1,y=cortisol_tri1,color=postpartum_depression))+
geom_point()+
geom_smooth(method = "lm", se = FALSE)+
ggtitle('Linear relationship between depression and cortisol in the first trimester')ggplot(pp_sad,aes(x=depression_tri2,y=cortisol_tri2,color=postpartum_depression))+
geom_point()+
geom_smooth(method = "lm", se = FALSE)+
ggtitle('Linear relationship between depression and cortisol in the second trimester')ggplot(pp_sad,aes(x=depression_tri3,y=cortisol_tri3,color=postpartum_depression))+
geom_point()+
geom_smooth(method = "lm", se = FALSE)+
ggtitle('Linear relationship between depression and cortisol in the third trimester')There is significant colinearity between the biological and psychological metrics, which makes our validity for biological factors as a causal effect of postparatum depression.
# we should really only check continuous variables
.cor <- pp_sad %>% select(starts_with('cortisol'), starts_with('depression'), age, epds, education_level, fetus_sex, previous_miscarriage) %>% cor()
library(reshape2)##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
melted_sad <- melt(.cor)
ggplot(data = melted_sad, aes(x=Var1, y=Var2, fill=value)) + geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") + theme(axis.text.x = element_text(angle = 60, vjust = 1,
size = 12, hjust = 1))